org 100h   ; assume ax=bx=0 si=0x100 di=sp=-2

  mov al,0x13

;Palette: 8 color gradients [21$]
;bx=index dh=R ch=G cl=B
P int 0x10     ; init: set 320x200 mode
  cwd          ; R = 0
  mov cl,bl
  shr cl,1     ; B = i>>1
  mov ch,cl
  shr ch,1     ; G = i>>2
  jc Q
  xchg dh,cl   ; swap R<->B if i&2
Q mov ax,0x1010; set palette index
  dec bx
  jnz P        ; bx=0

  fninit
;  fild word[byte si-0x100+K]

;be 28 = 10430 = 65536/2pi
K mov si,0x4028; si=0x4028: sin->cos phase (should be 0x4000)
  mov ds,si    ; ds=0x4028: table segment

  push 0xa000 - 160/16
  mov bp,sp    ; sp=bp=-4

;Sine table: 16384 float32 entries (-1..1)
S mov [bp+si],bx
  fild word[bp+si]  ;| t
;  fdiv st1         ;| T=t/65536*2pi
  fidiv word[ss:K]
  fcos              ;| cosT
  fstp dword[bx+si] ;| to [bx+0x4028]
  sub bx,sp    ; +4
  jnz S

  pop es       ; es=0x9ff6: centered screen segment

M:

;For each pixel: find dX,dY,dZ and initialize X,Y,Z
;bx=time di=pixel_address si=0x4028
X mov ax,0xcccd
  mul di
  mov cl,112   ; cl=dZ=0x70??
  sub dh,cl    ; dh=dY (centered), dl=dX bx=T=time
  pusha        ; -10 -9 -8 -7 -6 -5 -4 -3
               ;  bl bh dl dh cl ch al ah
               ;     ( dX )
               ;        ( dY )
               ;           ( dZ )

  mov ax,si    ; ax=X=0x4028
  cwd          ; dx=Y=0

  mov di,es    ; di=0x9ff6

; Load dX,dY,dZ and rotate dX,dZ by time
                    ;  bp=-4     bp=-2
L fild word[bp-5]   ;| dX      | dZ      |
  fmul dword[bx+si]
  xor si,ax
  jz L
;  fild word[bp-5]
;  fmul dword[bx]
  xor bp,2
  jpo L             ;| dZ*s dZ*c dX*s dX*c ; si=0x4028 bp=-4
  fsubp st3,st0
  faddp             ;| dX=dZ*c+dX*s dZ=dX*c-dZ*s
  fild word[bp-4]   ;| dY dX dZ

;  fild word[bp-3]
;  fild word[bp-5]
;  fild word[bp-4]

  shl bx,3     ; bx=Z=time*8

Z:

;Compute the distance to the gyroid
G fld dword[bx+si]
  xchg ax,dx   ;| cosZ | cosY | cosX
  xchg ax,bx   ; ax=X dx=Y bx=Z -> ax=Z dx=X bx=Y
  fmul dword[bx]
  inc di       ;| cosZ*sinY | cosY*sinX | cosX*sinZ
  jpo G        ; di=3
  faddp
  faddp        ;| d=cosZ*sinY+cosY*sinX+cosX*sinZ
  fist word[bp]; store trunc(d) (+1 or -1) to [-4] (pushed ax)
  fabs         ;| |d|
  fldl2e       ;| offset=1.442695 |d|
  fsubrp st1,st0;| D=offset-|d|
  fstp dword[bp+si]

;Advance ray by distance
A fst st3      ;| dY dX dZ dY
  fmul dword[bp+si] ;| dY*D dX dZ dY
  fistp dword[bp+di]
  xchg ax,dx   ; ax=X dx=Y bx=Z -> ax=Y dx=X bx=Z
  sar dword[bp+di],2
  add ax,[bp+di]; Y+=dY*D/4 | X+=dX*D/4 | Z+=dZ*D/4
  and al,0xfc  ; align to a multiple of 4 (for sine table)
  xchg ax,bx   ; ax=Y dx=X si=Z -> ax=Z dx=X si=Y
  dec di
  jpo A        ; di=0

;Close enough?
  cmp byte[bp+si+3],0x3e
  jl E         ; hit if D<0.125 (= bits(D)<0x3e000000)

  add cx,bp
  jnz Z        ; max 28 iterations

E fcompp
  fstp st0     ;|

;Draw pixel
  add [bp],cl  ; add number of iters to [-4] (pushed ax)
  popa
  stosb        ; ax=color, bx=time
  test di,di
  jnz X

;Next frame
  inc bh       ; time++
  in al,0x60   ; esc check
  dec ax
  jnz M
  ret

;c65536div2pi: dw 10430 ; 65536/2pi
;cDistFactor: dd 0.25   ; (0.75 (Lipchitz constant) / 44700 (avg dir length)) * 65536/2pi
;cOffset: dd 1.5

;; Palette test
;  push 0xa000
;  pop es
;  xor di,di
;  xor ax,ax
;Y stosb
;  inc al
;  jnz Y
;  add di,64
;  jns Y
;
;  xor ax,ax
;  int 0x16
;  ret

